In the following all deconvolution tools for spaceDeconv are tested and run once to showcase and test all tools.

Imports and Python Setup

library(reticulate)
library(TENxVisiumData) # data
library(TabulaMurisSenisData) # data
library(ggspavis) # visualization
library(SPOTlight)
# setup python
#reticulate::use_condaenv("cell2loc_env")
#scanpy <- reticulate::import("scanpy")
#reticulate::py_available()

Datasets

We are using two sample datasets from mouse kidneys.

# Spatial
sp <- TENxVisiumData::MouseKidneyCoronal()
rownames(sp) <- rowData(sp)$symbol # overwrite EnsemblID

# Single Cell 
sc <- TabulaMurisSenisData::TabulaMurisSenisDroplet(tissues= "Kidney")$Kidney

Let’s have a look inside the datasets. The console output shows the available cell types and the corresponding number of available cells. The Plot visualizes the number of gene counts per spot and the corresponding Visium Image.

1m 3m 18m 21m 24m 30m
CD45 11 32 76 54 1010 106
CD45 B cell 7 5 45 54 2322 62
CD45 NK cell 1 4 8 2 47 4
CD45 T cell 8 18 48 42 846 177
CD45 macrophage 59 132 254 101 259 514
CD45 plasma cell 0 0 9 12 169 42
Epcam kidney distal convoluted tubule epithelial cell 78 126 169 153 0 131
Epcam brush cell 52 15 78 169 0 31
Epcam kidney collecting duct principal cell 77 110 132 58 0 370
Epcam kidney proximal convoluted tubule epithelial cell 945 684 1120 868 0 817
Epcam podocyte 92 94 64 66 0 170
Epcam proximal tube epithelial cell 25 195 296 5 0 1977
Epcam thick ascending tube S epithelial cell 465 312 248 228 0 313
Pecam Kidney cortex artery cell 75 78 91 69 0 115
Pecam fenestrated capillary endothelial 164 182 164 134 0 211
Pecam kidney capillary endothelial cell 49 38 25 18 0 7
Stroma fibroblast 15 16 37 13 0 80
Stroma kidney mesangial cell 80 51 18 22 0 7
nan 285 238 256 189 1068 579

Preprocessing

We subset the cells to remove unsuitable cells and NAs. We further subset cells for performance reasons. In a full run this step will be skipped.

This section will further include normalization steps.

# We are only using data from 18m old cells
sc <- sc[, sc$age=="18m"]
sc <- sc[, !sc$free_annotation %in% c("nan", "CD45")] # remove false data

# downsampling for performance reasons, copied from MarcElosua/SPOTlight
idx <- split(seq(ncol(sc)), sc$free_annotation)
n_cells <- 50
cs_keep <- lapply(idx, function(i) {
    n <- length(i)
    if (n < n_cells)
        n_cells <- n
    sample(i, n_cells)
})
sc <- sc[, unlist(cs_keep)]

SPOTlight

Marker Genes are necessary for SPOTlight. Here i follow the guideline presented on their website. This could be done differently. A vector of highly variable Genes is also helpful but not required.

# SPOTligth parameters
slot <- "cell_ontology_class" # where are the cell types stored, find better term for "slot"
mgs <- NULL # DataFrame of scored Marker genes
# + obviously the single cell and spatial dataset
colLabels(sc) <- colData(sc)[[slot]] # add Labels

# norm 
sc <- scuttle::logNormCounts(sc) # score marksers

mgs <- scran::scoreMarkers(sc) # can include a subset step, requires logcounts slot/assay?

# this code is from the vignette
# one could perform this differently but the final dataframe needs to be present
mgs_fil <- lapply(names(mgs), function(i) {
  x <- mgs[[i]]
  # Filter and keep relevant marker genes, those with AUC > 0.8
  x <- x[x$mean.AUC > 0.8, ]
  # Sort the genes from highest to lowest weight
  x <- x[order(x$mean.AUC, decreasing = TRUE), ]
  # Add gene and cluster id to the dataframe
  x$gene <- rownames(x)
  x$cluster <- i
  data.frame(x)
})
mgs_df <- do.call(rbind, mgs_fil)

deconvolution <- SPOTlight::SPOTlight(sc, sp, mgs=mgs_df, weight_id = "mean.AUC", verbose=TRUE)
## Warning in .set_groups_if_null(x): Grouping cells into celltypes
##                     by SingleCellExperiment::colLabels(x)
## Scaling count matrix
## Seeding initial matrices
## Training NMF model
## Time for training: 1.45min
## Deconvoluting mixture data
deconvolution <- deconvolution$mat # extract the result
knitr::kable(head(deconvolution[, 4:6])) # just a subset
fenestrated cell fibroblast kidney capillary endothelial cell
AAACCGTTCGTCCAGG-1 0.1181604 0.0753071 0.2784810
AAACCTAAGCAGCCGG-1 0.0000000 0.0000000 0.0000000
AAACGAGACGGTTGAT-1 0.0000000 0.0000000 0.0000000
AAACGGTTGCGAACTG-1 0.0000000 0.0000000 0.0000000
AAACTCGGTTCGCAAT-1 0.0592537 0.0498360 0.2183969
AAACTGCTGGCTCCAA-1 0.0529544 0.0775426 0.0284172
# adding the results to colData
colnames(deconvolution) <- make.names(colnames(deconvolution))

for (i in colnames(deconvolution)){ # each separately
  sp[[i]] <- deconvolution[, i] 
}

# the colData annotation of the spatialExperiment now looks like this:
names(colData(sp))
##  [1] "sample_id"                                                
##  [2] "nCounts"                                                  
##  [3] "B.cell"                                                   
##  [4] "brush.cell"                                               
##  [5] "epithelial.cell.of.proximal.tubule"                       
##  [6] "fenestrated.cell"                                         
##  [7] "fibroblast"                                               
##  [8] "kidney.capillary.endothelial.cell"                        
##  [9] "kidney.collecting.duct.principal.cell"                    
## [10] "kidney.cortex.artery.cell"                                
## [11] "kidney.distal.convoluted.tubule.epithelial.cell"          
## [12] "kidney.loop.of.Henle.thick.ascending.limb.epithelial.cell"
## [13] "kidney.mesangial.cell"                                    
## [14] "kidney.proximal.convoluted.tubule.epithelial.cell"        
## [15] "macrophage"                                               
## [16] "NK.cell"                                                  
## [17] "plasma.cell"                                              
## [18] "podocyte"                                                 
## [19] "T.cell"

RCTD (spacexr)

library(spacexr)
sc2 <- sc # using a copy in this case because we are removing cells

# first subset the sc reference, every cell type needs to be present more than 25 times!!!!!
cellTable <- as.data.frame(table(sc2[["cell_ontology_class"]])) # table of all factors and their count
cellTable <- cellTable[cellTable$Freq>=25,] # drop all below 25
cellsToKeep <- as.character(cellTable$Var1) # character vector for subsetting

sc2 <- sc2[, sc2[["cell_ontology_class"]] %in% cellsToKeep] # subset cells (columns)
sc2[["cell_ontology_class"]] <- droplevels(sc2[["cell_ontology_class"]]) # drop zero levels in factor

Construct Single Cell Reference

# counts as dgC 
counts <- as(counts(sc2), "dgCMatrix")  # convert from delayed matrix to dgC

# named cell type factor
cell_types <- colData(sc2)[, "cell_ontology_class"] # get cell type factor
names(cell_types) <- rownames(colData(sc2)) # name it!
cell_types <- as.factor(cell_types) # ensure factor

# named Umi Count
nUMI <- colData(sc2)[, "n_counts"]
names(nUMI) <- rownames(colData(sc2)) # name it!

# create Reference
reference <- spacexr::Reference(counts, cell_types, nUMI)

Construct Spatial Dataset

coords <- SpatialExperiment::spatialCoords(sp) # spatial coordinates
coords <- as.data.frame(coords)
counts <- counts(sp) # counts
nUMI <- colData(sp)[, "nCounts"] 
# all three above are already named but that does not have to be the case

# in this specific case we must make gene names unique! There are duplicates
rownames(counts) <- make.names(rownames(counts), unique=T)

puck <- spacexr::SpatialRNA(coords, counts, nUMI)

Create RCTD Object and run

RCTDObject <- spacexr::create.RCTD(puck, reference, max_cores = 20, UMI_min = 0) # NOTE UMI_min = 0!!!!
## 
##                                                    B cell 
##                                                        43 
##                                                    T cell 
##                                                        48 
##                                                brush cell 
##                                                        50 
##                        epithelial cell of proximal tubule 
##                                                        50 
##                                          fenestrated cell 
##                                                        50 
##                                                fibroblast 
##                                                        36 
##                     kidney collecting duct principal cell 
##                                                        50 
##                                 kidney cortex artery cell 
##                                                        43 
##           kidney distal convoluted tubule epithelial cell 
##                                                        50 
## kidney loop of Henle thick ascending limb epithelial cell 
##                                                        50 
##                                     kidney mesangial cell 
##                                                        30 
##         kidney proximal convoluted tubule epithelial cell 
##                                                        50 
##                                                macrophage 
##                                                        51 
##                                                  podocyte 
##                                                        50
RCTDObject <- spacexr::run.RCTD(RCTDObject)
## [1] "gather_results: finished 1000"
results <- RCTDObject@results
norm_weights <- spacexr::normalize_weights(results$weights) # normalize the weights to sum to one
norm_weights <- as.matrix(norm_weights)

Plot Results

omnideconv

Omnideconv does take singleCellExperiments as input! The bulk needs to be a matrix and can be extracted by counts(sp). batch IDs can also be extracted from the singleCellExperiment.

NOTE: This omnideconv example worked but i had some errors with different methods. This is to showcase the overall omnideconv usage in spaceDeconv.

library(omnideconv)
## Warning: vorhergehender Import 'magrittr::extract' durch 'tidyr::extract'
## während des Ladens von 'omnideconv' ersetzt

Single Cell Reference

# works directly with the singleCellExperiment
batch_ids <- colData(sc)$mouse.id # batch id not cell id!!!!

signature <- omnideconv::build_model(sc, method = "scdc", cell_type_column_name = "cell_ontology_class", batch_ids = batch_ids, verbose=TRUE) 

Deconvolution

# this spatial dataset has duplicate rownames
rownames(sp) <- make.names(rownames(sp), unique = T)

#extract bulk matrix
bulk <- counts(sp) # could be different!

# deconvolute
deconvolution <- omnideconv::deconvolute(bulk, signature=signature, method="scdc", verbose=TRUE, single_cell_object=sc, cell_type_column_name = "cell_ontology_class", batch_ids = batch_ids)

Immunedeconv

Note: Immundeconv requires HGNC Symbols. This might not be the case for all datasets so we should add a note/ give options to translate the names???

library(immunedeconv)
# needs to be a real matric or Data Frame
bulk <- counts(sp)
bulk <- as.matrix(bulk) # make.names already called

# HGNC Symbols! In this case just uppercase is enough
rownames(bulk) <- toupper(rownames(bulk))

deconvolution <- immunedeconv::deconvolute(bulk, method="epic")
# move first column to colnames
deconvolution <- as.matrix(deconvolution)
rownames <- deconvolution[, 1]
deconvolution <- deconvolution[, -1]
deconvolution <- apply(deconvolution, 2, as.numeric) # turn into numeric
rownames(deconvolution) <- rownames

# transpose
deconvolution <- t(deconvolution)

colnames(deconvolution) <- make.names(colnames(deconvolution)) # removing the whitespace

for (i in colnames(deconvolution)){ # each separately
  sp[[i]] <- deconvolution[, i] 
}

CARD

library(CARD)
## Warning in .recacheSubclasses(def@className, def, env): undefined subclass
## "numericVector" of class "Mnumeric"; definition not updated
## Registered S3 methods overwritten by 'proxy':
##   method               from    
##   print.registry_field registry
##   print.registry_entry registry
## Registered S3 method overwritten by 'ggforce':
##   method           from 
##   scale_type.units units
## Warning: vorhergehender Import 'RcppML::nmf' durch 'NMF::nmf' während des Ladens
## von 'CARD' ersetzt
spExp <- counts(sp) # sp expression
spCoord <- SpatialExperiment::spatialCoords(sp) %>% as.data.frame() # coords
colnames(spCoord) <- c("x", "y") # need to be x and y

scExp <- counts(sc) %>% as("dgCMatrix") # sc expression matrix
scMeta <- data.frame(cellType = colData(sc)$cell_ontology_class, sampleInfo=colData(sc)$mouse.id, row.names = colnames(sc)) # annotation

cts <- as.vector(unique(sc$cell_ontology_class))

cardObj <- createCARDObject(sc_count = scExp, sc_meta = scMeta, spatial_count = spExp, spatial_location = spCoord, sample.varname = "sampleInfo", ct.varname = "cellType", ct.select = cts, minCountSpot = 0, minCountGene = 0) # last two set to zero that we get results for all spots
## ## QC on scRNASeq dataset! ...
## ## QC on spatially-resolved dataset! ...
cardObj <- CARD_deconvolution(cardObj)
## ## create reference matrix from scRNASeq...
## ## Select Informative Genes! ...
## ## Deconvolution Starts! ...
## ## Deconvolution Finish! ...

Cell2location